{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "67c1e6b1",
   "metadata": {},
   "source": [
    "\n",
    "\n",
    "下面的例子将展示词向量标准工具包——gensim提供的词嵌入，并展示词嵌入如何表示词的相似度。\n",
    "<!-- https://nlp.stanford.edu/projects/glove/ -->"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5c5a740a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pprint\n",
    "\n",
    "from gensim.models import KeyedVectors\n",
    "\n",
    "# 从GloVe官网下载GloVe向量，此处使用的是glove.6B.zip\n",
    "# 解压缩zip文件并将以下路径改为解压后对应文件的路径\n",
    "model = KeyedVectors.load_word2vec_format('./data/glove.6B.100d.txt', binary=False, no_header=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "01a2e4a5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[('movie', 0.9055122137069702),\n",
      " ('films', 0.8914434909820557),\n",
      " ('directed', 0.8124362826347351),\n",
      " ('documentary', 0.8075793385505676),\n",
      " ('drama', 0.7929168343544006),\n",
      " ('movies', 0.7889865040779114),\n",
      " ('comedy', 0.7842751145362854),\n",
      " ('starring', 0.7573286294937134),\n",
      " ('cinema', 0.7419455647468567),\n",
      " ('hollywood', 0.7307389974594116)]\n",
      "[('vehicle', 0.8630838394165039),\n",
      " ('truck', 0.8597878813743591),\n",
      " ('cars', 0.837166965007782),\n",
      " ('driver', 0.8185911178588867),\n",
      " ('driving', 0.7812634110450745),\n",
      " ('motorcycle', 0.7553157210350037),\n",
      " ('vehicles', 0.7462257146835327),\n",
      " ('parked', 0.74594646692276),\n",
      " ('bus', 0.737270712852478),\n",
      " ('taxi', 0.7155269384384155)]\n"
     ]
    }
   ],
   "source": [
    "# 使用most_similar()找到词表中距离给定词最近（最相似）的n个词\n",
    "pprint.pprint(model.most_similar('film'))\n",
    "pprint.pprint(model.most_similar('car'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "8b62f7ad",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "japanese\n",
      "panda\n",
      "longest\n",
      "terrible\n",
      "queen\n"
     ]
    }
   ],
   "source": [
    "# 利用GloVe展示一个类比的例子\n",
    "def analogy(x1, x2, y1):\n",
    "    # 寻找top-N最相似的词。\n",
    "    result = model.most_similar(positive=[y1, x2], negative=[x1])\n",
    "    return result[0][0]\n",
    "\n",
    "print(analogy('china', 'chinese', 'japan'))\n",
    "print(analogy('australia', 'koala', 'china'))\n",
    "print(analogy('tall', 'tallest', 'long'))\n",
    "print(analogy('good', 'fantastic', 'bad'))\n",
    "print(analogy('man', 'woman', 'king'))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0c308cee",
   "metadata": {},
   "source": [
    "下面将展示word2vec的代码，包括文本预处理、skipgram算法的实现、以及使用PyTorch进行优化。这里使用《小王子》这本书作为训练语料。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "590fc408",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 安装NLTK，使用如下代码下载punkt组件\n",
    "#import nltk\n",
    "#nltk.download('punkt')\n",
    "\n",
    "from nltk.tokenize import sent_tokenize, word_tokenize\n",
    "from collections import defaultdict\n",
    "\n",
    "# 使用类管理数据对象，包括文本读取、文本预处理等\n",
    "class TheLittlePrinceDataset:\n",
    "    def __init__(self, tokenize=True):\n",
    "        # 利用NLTK函数进行分句和分词\n",
    "        text = open('./data/the little prince.txt', 'r', encoding='utf-8').read()\n",
    "        if tokenize:\n",
    "            self.sentences = sent_tokenize(text.lower())\n",
    "            self.tokens = [word_tokenize(sent) for sent in self.sentences]\n",
    "        else:\n",
    "            self.text = text\n",
    "\n",
    "    def build_vocab(self, min_freq=1):\n",
    "        # 统计词频\n",
    "        frequency = defaultdict(int)\n",
    "        for sentence in self.tokens:\n",
    "            for token in sentence:\n",
    "                frequency[token] += 1\n",
    "        self.frequency = frequency\n",
    "\n",
    "        # 加入<unk>处理未登录词，加入<pad>用于对齐变长输入进而加速\n",
    "        self.token2id = {'<unk>': 1, '<pad>': 0}\n",
    "        self.id2token = {1: '<unk>', 0: '<pad>'}\n",
    "        for token, freq in sorted(frequency.items(), key=lambda x: -x[1]):\n",
    "            # 丢弃低频词\n",
    "            if freq > min_freq:\n",
    "                self.token2id[token] = len(self.token2id)\n",
    "                self.id2token[len(self.id2token)] = token\n",
    "            else:\n",
    "                break\n",
    "\n",
    "    def get_word_distribution(self):\n",
    "        distribution = np.zeros(vocab_size)\n",
    "        for token, freq in self.frequency.items():\n",
    "            if token in dataset.token2id:\n",
    "                distribution[dataset.token2id[token]] = freq\n",
    "            else:\n",
    "                # 不在词表中的词按<unk>计算\n",
    "                distribution[1] += freq\n",
    "        distribution /= distribution.sum()\n",
    "        return distribution\n",
    "\n",
    "    # 将分词结果转化为索引表示\n",
    "    def convert_tokens_to_ids(self, drop_single_word=True):\n",
    "        self.token_ids = []\n",
    "        for sentence in self.tokens:\n",
    "            token_ids = [self.token2id.get(token, 1) for token in sentence]\n",
    "            # 忽略只有一个token的序列，无法计算loss\n",
    "            if len(token_ids) == 1 and drop_single_word:\n",
    "                continue\n",
    "            self.token_ids.append(token_ids)\n",
    "        \n",
    "        return self.token_ids\n",
    "\n",
    "dataset = TheLittlePrinceDataset()\n",
    "dataset.build_vocab(min_freq=1)\n",
    "sentences = dataset.convert_tokens_to_ids()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "efc882de",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(76044, 2) [[  4  16]\n",
      " [  4  19]\n",
      " [ 16   4]\n",
      " ...\n",
      " [130   3]\n",
      " [  3  86]\n",
      " [  3 130]]\n"
     ]
    }
   ],
   "source": [
    "# 遍历所有的中心词-上下文词对\n",
    "window_size = 2\n",
    "data = []\n",
    "\n",
    "for sentence in sentences:\n",
    "    for i in range(len(sentence)):\n",
    "        for j in range(i-window_size, i+window_size+1):\n",
    "            if j == i or j < 0 or j >= len(sentence):\n",
    "                continue\n",
    "            center_word = sentence[i]\n",
    "            context_word = sentence[j]\n",
    "            data.append([center_word, context_word])\n",
    "\n",
    "# 需要提前安装numpy\n",
    "import numpy as np\n",
    "data = np.array(data)\n",
    "print(data.shape, data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "30903b3d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 需要提前安装PyTorch\n",
    "import torch\n",
    "from torch import nn\n",
    "import torch.nn.functional as F\n",
    "\n",
    "# 实现skipgram算法，使用对比学习计算损失\n",
    "class SkipGramNCE(nn.Module):\n",
    "    def __init__(self, vocab_size, embed_size, distribution,\\\n",
    "                 neg_samples=20):\n",
    "        super(SkipGramNCE, self).__init__()\n",
    "        print(f'vocab_size = {vocab_size}, embed_size = {embed_size}, '+\\\n",
    "              f'neg_samples = {neg_samples}')\n",
    "        self.input_embeddings = nn.Embedding(vocab_size, embed_size)\n",
    "        self.output_embeddings = nn.Embedding(vocab_size, embed_size)\n",
    "        distribution = np.power(distribution, 0.75)\n",
    "        distribution /= distribution.sum()\n",
    "        self.distribution = torch.tensor(distribution)\n",
    "        self.neg_samples = neg_samples\n",
    "        \n",
    "    def forward(self, input_ids, labels):\n",
    "        i_embed = self.input_embeddings(input_ids)\n",
    "        o_embed = self.output_embeddings(labels)\n",
    "        batch_size = i_embed.size(0)\n",
    "        n_words = torch.multinomial(self.distribution, batch_size * \\\n",
    "            self.neg_samples, replacement=True).view(batch_size, -1)\n",
    "        n_embed = self.output_embeddings(n_words)\n",
    "        pos_term = F.logsigmoid(torch.sum(i_embed * o_embed, dim=1))\n",
    "        # 负采样，用于对比学习\n",
    "        neg_term = F.logsigmoid(- torch.bmm(n_embed, \\\n",
    "            i_embed.unsqueeze(2)).squeeze())\n",
    "        neg_term = torch.sum(neg_term, dim=1)\n",
    "        loss = - torch.mean(pos_term + neg_term)\n",
    "        return loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "1d9da6c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0.00000000e+00 5.43983724e-02 5.34295679e-02 ... 9.68804495e-05\n",
      " 9.68804495e-05 9.68804495e-05]\n",
      "vocab_size = 1078, embed_size = 128, neg_samples = 20\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "epoch-99, loss=2.9060: 100%|█| 100/100 [04:59<00:00,  3.00s/\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjIAAAGwCAYAAACzXI8XAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAASSdJREFUeJzt3Xd0lGX+/vFrJpOZ9AoklIQuHekQEEFBQVxEZdeGioodVGSta/u5q8JXXTuyVnBdXdsq9oKoFKWGJr0FCJCEEtL7zPP7I8lAhEDKzDyT8H6dk3PIM89MPrkJmYu7WgzDMAQAANAAWc0uAAAAoK4IMgAAoMEiyAAAgAaLIAMAABosggwAAGiwCDIAAKDBIsgAAIAGy2Z2Ad7mcrm0f/9+hYeHy2KxmF0OAACoAcMwlJubqxYtWshqrb7fpdEHmf379yshIcHsMgAAQB2kpqaqVatW1T7e6INMeHi4pPKGiIiIMLkaAABQEzk5OUpISHC/j1en0QeZyuGkiIgIggwAAA3MqaaFMNkXAAA0WAQZAADQYBFkAABAg0WQAQAADRZBBgAANFgEGQAA0GARZAAAQINFkAEAAA0WQQYAADRYBBkAANBgEWQAAECDRZABAAANVqM/NNJbcotKlVVQqjCHTdGhdrPLAQDgtESPTB39/cuNGvr0z/rvij1mlwIAwGmLIFNHoY7yzqz84jKTKwEA4PRFkKmjMHeQcZpcCQAApy+CTB1V9sjk0SMDAIBpCDJ1FOYIkMTQEgAAZiLI1BE9MgAAmI8gU0dhBBkAAExHkKmjMFYtAQBgOoJMHYWyagkAANMRZOqIOTIAAJiPIFNHx86RMQzD5GoAADg9EWTqKLRi+bXTZai4zGVyNQAAnJ4IMnUUaj963ibDSwAAmIMgU0dWq0WhdjbFAwDATASZemDCLwAA5iLI1AMHRwIAYC6CTD0c7ZEpNbkSAABOTwSZeji6BJseGQAAzECQqYdQjikAAMBUBJl6CHOwagkAADMRZOqBVUsAAJiLIFMP7jkyRQQZAADMQJCpB/ccmRKCDAAAZiDI1AOrlgAAMBdBph7CWLUEAICpCDL1wGRfAADM5TdBZsaMGbJYLJo6dar7WlFRkSZPnqzY2FiFhYVp/PjxysjIMK/IPwhl+TUAAKbyiyCzYsUKvfbaa+rZs2eV63fffbe+/PJLffzxx1qwYIH279+vSy+91KQqjxdGjwwAAKYyPcjk5eVpwoQJeuONNxQdHe2+np2drbfeekvPPfeczj33XPXt21ezZ8/Wb7/9pqVLl5pY8VHs7AsAgLlMDzKTJ0/WhRdeqJEjR1a5npycrNLS0irXO3furMTERC1ZsqTa1ysuLlZOTk6VD2+hRwYAAHPZzPziH3zwgVatWqUVK1Yc91h6errsdruioqKqXI+Li1N6enq1rzl9+nQ9/vjjni71hCqDTFGpS2VOl2wBpudCAABOK6a986ampuquu+7Se++9p6CgII+97oMPPqjs7Gz3R2pqqsde+48qh5YkKb+EvWQAAPA104JMcnKyDhw4oD59+shms8lms2nBggV66aWXZLPZFBcXp5KSEmVlZVV5XkZGhuLj46t9XYfDoYiIiCof3mK3WWWv6IVheAkAAN8zbWhpxIgR+v3336tcu/7669W5c2fdf//9SkhIUGBgoObPn6/x48dLkrZs2aI9e/YoKSnJjJJPKNQRoJICFxN+AQAwgWlBJjw8XN27d69yLTQ0VLGxse7rkyZN0rRp0xQTE6OIiAjdcccdSkpK0qBBg8wo+YTCgmw6UlBKjwwAACYwdbLvqTz//POyWq0aP368iouLNWrUKL366qtml1VFqJ0l2AAAmMWvgswvv/xS5fOgoCDNnDlTM2fONKegGuC8JQAAzMN64XoK5QRsAABMQ5CpJ/emeEWlJlcCAMDphyBTT+6DI9lHBgAAnyPI1FOYI1AS+8gAAGAGgkw9hVX2yBBkAADwOYJMPYVycCQAAKYhyNSTO8gUEWQAAPA1gkw9ufeRKSHIAADgawSZemIfGQAAzEOQqSd29gUAwDwEmXoiyAAAYB6CTD1VbojHqiUAAHyPIFNPx/bIGIZhcjUAAJxeCDL1VDnZ12VIhaVM+AUAwJcIMvUUYg+QxVL+Z4aXAADwLYJMPVksFoXZK4eX6JEBAMCXCDIeEMrKJQAATEGQ8YDKlUu5HFMAAIBPEWQ8gL1kAAAwB0HGA0I5bwkAAFMQZDwgzH3eEkEGAABfIsh4AENLAACYgyDjAZyADQCAOQgyHuAOMqxaAgDApwgyHhBWsfyaoSUAAHyLIOMB7h4ZVi0BAOBTBBkPYLIvAADmIMh4AEEGAABzEGQ8oHJoiSMKAADwLYKMB7CzLwAA5iDIeMDRoSX2kQEAwJcIMh4QFsQRBQAAmIEg4wFh9vIgU1LmUqnTZXI1AACcPggyHhBasSGexMolAAB8iSDjAbYAqxy28qZk5RIAAL5DkPGQMFYuAQDgcwQZDwllUzwAAHyOIOMhlT0yeSzBBgDAZwgyHsIxBQAA+B5BxkMqVy7lMdkXAACfIch4SKiDTfEAAPA1goyHMLQEAIDvEWQ8xN0jw/JrAAB8hiDjIfTIAADgewQZD+EEbAAAfI8g4yGVQ0scUQAAgO8QZDykcvk1Q0sAAPgOQcZDOGsJAADfI8h4CPvIAADgewQZD2HVEgAAvkeQ8RD3oZFM9gUAwGcIMh4S6p4j45TLZZhcDQAApweCjIeEB9ncf2Z3XwAAfIMg4yFBgQEKDixfgn0kv8TkagAAOD0QZDwoJtQuSTpMkAEAwCcIMh4UG1YeZDLzCDIAAPgCQcaDKntkMumRAQDAJwgyHuQOMgUEGQAAfIEg40ExIfTIAADgSwQZD4qpmCNzmDkyAAD4BEHGg2Ldc2SKTa4EAIDTA0HGg2JCHZIYWgIAwFcIMh7EPjIAAPgWQcaDYll+DQCATxFkPKhysm9BiVNFpU6TqwEAoPEjyHhQuMOmwACLJHplAADwBYKMB1ksFkWzlwwAAD5DkPEwJvwCAOA7BBkPcx8cyV4yAAB4HUHGwyr3kmF3XwAAvI8g42EswQYAwHdMDTKzZs1Sz549FRERoYiICCUlJenbb791P15UVKTJkycrNjZWYWFhGj9+vDIyMkys+NRiCDIAAPiMqUGmVatWmjFjhpKTk7Vy5Uqde+65GjdunDZs2CBJuvvuu/Xll1/q448/1oIFC7R//35deumlZpZ8Skz2BQDAd2xmfvGxY8dW+fzJJ5/UrFmztHTpUrVq1UpvvfWW3n//fZ177rmSpNmzZ6tLly5aunSpBg0adMLXLC4uVnHx0Ym2OTk53vsGTqAyyBwhyAAA4HV+M0fG6XTqgw8+UH5+vpKSkpScnKzS0lKNHDnSfU/nzp2VmJioJUuWVPs606dPV2RkpPsjISHBF+W7MbQEAIDvmB5kfv/9d4WFhcnhcOjWW2/VZ599pq5duyo9PV12u11RUVFV7o+Li1N6enq1r/fggw8qOzvb/ZGamurl76CqWIaWAADwGVOHliSpU6dOWrNmjbKzs/XJJ59o4sSJWrBgQZ1fz+FwyOFweLDC2qnskckuLFWp06XAANOzIgAAjZbpQcZut6tDhw6SpL59+2rFihV68cUXdfnll6ukpERZWVlVemUyMjIUHx9vUrWnFhVil8UiGYZ0pKBEzcKDzC4JAIBGy++6C1wul4qLi9W3b18FBgZq/vz57se2bNmiPXv2KCkpycQKTy7AynlLAAD4iqk9Mg8++KAuuOACJSYmKjc3V++//75++eUXff/994qMjNSkSZM0bdo0xcTEKCIiQnfccYeSkpKqXbHkL2JC7crML1Emu/sCAOBVpgaZAwcO6Nprr1VaWpoiIyPVs2dPff/99zrvvPMkSc8//7ysVqvGjx+v4uJijRo1Sq+++qqZJddITAgTfgEA8AVTg8xbb7110seDgoI0c+ZMzZw500cVeYZ7L5kCggwAAN7kd3NkGoOYihOwOTgSAADvIsh4AQdHAgDgGwQZL2B3XwAAfIMg4wVHD44sPsWdAACgPggyXhAbWr6zMD0yAAB4F0HGC6JDAyURZAAA8DaCjBdU9sgcKSiVy2WYXA0AAI0XQcYLKntknC5DOUWlJlcDAEDjRZDxAoctQOGO8r0G2d0XAADvIch4SeWmeMyTAQDAewgyXuJegs3uvgAAeA1BxkvY3RcAAO8jyHjJ0d192RQPAABvIch4SbR7d196ZAAA8BaCjJdUDi0dIcgAAOA1BBkvianYFI8eGQAAvIcg4yVM9gUAwPsIMl4SQ5ABAMDrCDJeEnPMZF/D4LwlAAC8gSDjJbEVO/uWlLmUX+I0uRoAABongoyXBAcGyGErb95MdvcFAMArCDJeYrFY3BN+D7MpHgAAXkGQ8aLKgyOPFNAjAwCANxBkvMi9lwxDSwAAeAVBxovYSwYAAO8iyHgRe8kAAOBdBBkviuHgSAAAvIog40UxHBwJAIBXEWS8iB4ZAAC8iyDjRe4eGZZfAwDgFQQZL3JP9mX5NQAAXkGQ8aLK5de5xWUqLuO8JQAAPI0g40URQYEKsFokSVkFpSZXAwBA40OQ8SKr1aLokEBJ7O4LAIA3EGS8LDqECb8AAHgLQcbLWIINAID3EGS8jE3xAADwHoKMl9EjAwCA9xBkvOzoCdjFJlcCAEDjQ5Dxsmj30BLLrwEA8DSCjJcdHVqiRwYAAE+rU5B555139PXXX7s/v++++xQVFaXBgwdr9+7dHiuuMYihRwYAAK+pU5B56qmnFBwcLElasmSJZs6cqaefflpNmjTR3Xff7dECGzom+wIA4D22ujwpNTVVHTp0kCTNnTtX48eP180336whQ4Zo+PDhnqyvwTv2BGzDMGSxWEyuCACAxqNOPTJhYWE6fPiwJOmHH37QeeedJ0kKCgpSYWGh56prBCqDjNNlKKewzORqAABoXOrUI3PeeefpxhtvVO/evbV161aNGTNGkrRhwwa1adPGk/U1eA5bgMIcNuUVl+lwfrEiK85eAgAA9VenHpmZM2cqKSlJBw8e1P/+9z/FxsZKkpKTk3XllVd6tMDGIDq0PLxw3hIAAJ5Vpx6ZqKgovfLKK8ddf/zxx+tdUGMUE+pQamYhJ2ADAOBhdeqR+e6777R48WL35zNnzlSvXr101VVX6ciRIx4rrrGICaFHBgAAb6hTkLn33nuVk5MjSfr999/117/+VWPGjFFKSoqmTZvm0QIbg5hQhySWYAMA4Gl1GlpKSUlR165dJUn/+9//9Kc//UlPPfWUVq1a5Z74i6NiwzgBGwAAb6hTj4zdbldBQYEk6ccff9T5558vSYqJiXH31OCo6BA2xQMAwBvq1CNz1llnadq0aRoyZIiWL1+uDz/8UJK0detWtWrVyqMFNgZHT8AmyAAA4El16pF55ZVXZLPZ9Mknn2jWrFlq2bKlJOnbb7/V6NGjPVpgY3D0BGyCDAAAnlSnHpnExER99dVXx11//vnn611QY8R5SwAAeEedgowkOZ1OzZ07V5s2bZIkdevWTRdddJECAgI8VlxjEUOPDAAAXlGnILN9+3aNGTNG+/btU6dOnSRJ06dPV0JCgr7++mu1b9/eo0U2dJVBJr/EqaJSp4ICCXsAAHhCnebI3HnnnWrfvr1SU1O1atUqrVq1Snv27FHbtm115513errGBi8iyCabtfzUazbFAwDAc+rUI7NgwQItXbpUMTEx7muxsbGaMWOGhgwZ4rHiGguLxaLoULsO5hbrcF6JmkcGm10SAACNQp16ZBwOh3Jzc4+7npeXJ7vdXu+iGqPKJdj0yAAA4Dl1CjJ/+tOfdPPNN2vZsmUyDEOGYWjp0qW69dZbddFFF3m6xkahclM89pIBAMBz6hRkXnrpJbVv315JSUkKCgpSUFCQBg8erA4dOuiFF17wcImNQ0zFMQWcgA0AgOfUaY5MVFSUPv/8c23fvt29/LpLly7q0KGDR4trTGJCGFoCAMDTahxkTnWq9c8//+z+83PPPVf3ihopNsUDAMDzahxkVq9eXaP7LBZLnYtpzDgBGwAAz6txkDm2xwW1xwnYAAB4Xp0m+6L2YjmmAAAAjyPI+EjlCdgsvwYAwHMIMj5y7IZ4LpdhcjUAADQOBBkfiaqYI+MypOzCUpOrAQCgcTA1yEyfPl39+/dXeHi4mjVrposvvlhbtmypck9RUZEmT56s2NhYhYWFafz48crIyDCp4rqz26wKDyqfW82EXwAAPMPUILNgwQJNnjxZS5cu1bx581RaWqrzzz9f+fn57nvuvvtuffnll/r444+1YMEC7d+/X5deeqmJVdcd5y0BAOBZddrZ11O+++67Kp/PmTNHzZo1U3Jyss4++2xlZ2frrbfe0vvvv69zzz1XkjR79mx16dJFS5cu1aBBg8wou86iQ+3adbiAYwoAAPAQv5ojk52dLUmKiYmRJCUnJ6u0tFQjR45039O5c2clJiZqyZIlJ3yN4uJi5eTkVPnwF/TIAADgWX4TZFwul6ZOnaohQ4aoe/fukqT09HTZ7XZFRUVVuTcuLk7p6eknfJ3p06crMjLS/ZGQkODt0muME7ABAPAsvwkykydP1vr16/XBBx/U63UefPBBZWdnuz9SU1M9VGH9cQI2AACeZeocmUpTpkzRV199pYULF6pVq1bu6/Hx8SopKVFWVlaVXpmMjAzFx8ef8LUcDoccDoe3S64ThpYAAPAsU3tkDMPQlClT9Nlnn+mnn35S27Ztqzzet29fBQYGav78+e5rW7Zs0Z49e5SUlOTrcuuN85YAAPAsU3tkJk+erPfff1+ff/65wsPD3fNeIiMjFRwcrMjISE2aNEnTpk1TTEyMIiIidMcddygpKanBrViSOAEbAABPMzXIzJo1S5I0fPjwKtdnz56t6667TpL0/PPPy2q1avz48SouLtaoUaP06quv+rhSz2CyLwAAnmVqkDGMU585FBQUpJkzZ2rmzJk+qMi7YkPL5+4QZAAA8Ay/WbV0OogODZQkFZY6VVjiNLkaAAAaPoKMD4U5bLIHlDf54fxik6sBAKDhI8j4kMViUdPw8uGlA7kEGQAA6osg42PxkUGSpIzsIpMrAQCg4SPI+Fh8RHmQSSPIAABQbwQZH6vskUnPIcgAAFBfBBkfax5JjwwAAJ5CkPEx5sgAAOA5BBkfc8+RySk0uRIAABo+goyPHe2RKa7RzsYAAKB6BBkfaxYeJItFKnG6OKoAAIB6Isj4mN1mdZ+5xIRfAADqhyBjgsqVS+kEGQAA6oUgY4LKeTJp7CUDAEC9EGRM0Jwl2AAAeARBxgRxHFMAAIBHEGRM4J4jw14yAADUC0HGBPFM9gUAwCMIMiZoHhksqXxoiU3xAACoO4KMCSqPKSgocSq3uMzkagAAaLgIMiYItgcoMjhQEsNLAADUB0HGJGyKBwBA/RFkTFK5BJsgAwBA3RFkTFLZI8NeMgAA1B1BxiTx7CUDAEC9EWRMwhwZAADqjyBjEo4pAACg/ggyJqncFC+dE7ABAKgzgoxJKufIZBWUqqjUaXI1AAA0TAQZk0QE2RRiD5DEPBkAAOqKIGMSi8XiPqqAeTIAANQNQcZELMEGAKB+CDImcgeZ7GKTKwEAoGEiyJgo3n1MAT0yAADUBUHGRBxTAABA/RBkTBRfsZdMBnvJAABQJwQZE9EjAwBA/RBkTFR5TMHBvGKVOl0mVwMAQMNDkDFRbKhdgQEWGYZ0IJeVSwAA1BZBxkRWq8XdK8PuvgAA1B5BxmTNIwkyAADUFUHGZHHuYwrYSwYAgNoiyJiMHhkAAOqOIGOyyr1k0thLBgCAWiPImKxpuEOSdDiPVUsAANQWQcZkMSF2SdKR/FKTKwEAoOEhyJgsOjRQkpRZUGJyJQAANDwEGZPFhFb2yJTIMAyTqwEAoGEhyJgsumJoqcxlKLe4zORqAABoWAgyJgsKDFCIPUBSea8MAACoOYKMH6jslTlSwIRfAABqgyDjByon/NIjAwBA7RBk/EBlj0wmQQYAgFohyPgB98ollmADAFArBBk/QI8MAAB1Q5DxA/TIAABQNwQZPxAdSo8MAAB1QZDxAzEsvwYAoE4IMn4gOoTl1wAA1AVBxg9EM0cGAIA6Icj4gaOTfUvlcnFwJAAANUWQ8QNRFUNLTpeh3CIOjgQAoKYIMn7AYQtQmMMmScpkeAkAgBojyPiJyvOWWIINAEDNEWT8ROUS7Cx6ZAAAqDGCjJ+I4pgCAABqjSDjJzimAACA2iPI+ImjB0eyuy8AADVFkPETMaHs7gsAQG0RZPyE++BIhpYAAKgxgoyfcB8cSY8MAAA1ZmqQWbhwocaOHasWLVrIYrFo7ty5VR43DEOPPvqomjdvruDgYI0cOVLbtm0zp1gv47wlAABqz9Qgk5+frzPPPFMzZ8484eNPP/20XnrpJf3rX//SsmXLFBoaqlGjRqmoqMjHlXpf5WTfIwVM9gUAoKZsZn7xCy64QBdccMEJHzMMQy+88IIefvhhjRs3TpL073//W3FxcZo7d66uuOKKEz6vuLhYxcXF7s9zcnI8X7gXVO7sm1VQIqfLUIDVYnJFAAD4P7+dI5OSkqL09HSNHDnSfS0yMlIDBw7UkiVLqn3e9OnTFRkZ6f5ISEjwRbn1Vtkj4zKknEJ6ZQAAqAm/DTLp6emSpLi4uCrX4+Li3I+dyIMPPqjs7Gz3R2pqqlfr9JTAAKvCgzg4EgCA2jB1aMkbHA6HHA6H2WXUSUyoXblFZeUrl5qaXQ0AAP7Pb3tk4uPjJUkZGRlVrmdkZLgfa2yiOW8JAIBa8dsg07ZtW8XHx2v+/Pnuazk5OVq2bJmSkpJMrMx7Ks9bymLlEgAANWLq0FJeXp62b9/u/jwlJUVr1qxRTEyMEhMTNXXqVD3xxBPq2LGj2rZtq0ceeUQtWrTQxRdfbF7RXhQVUr5yiTkyAADUjKlBZuXKlTrnnHPcn0+bNk2SNHHiRM2ZM0f33Xef8vPzdfPNNysrK0tnnXWWvvvuOwUFBZlVslexuy8AALVjapAZPny4DMOo9nGLxaK///3v+vvf/+7DqszjPm+pBkGmpMyl9OwiJcaGeLssAAD8lt/OkTkdxdTimILHv9ygs5/5WW8u2untsgAA8FsEGT9Sm1VLa/dmSZKe/GaT5m/KOPnNAAA0UgQZP3K0R+bUq5b2Z5WfN2UY0p3/Xa3N6Q3jKAYAADyJIONHYirOWzrV0FJBSZm716Z3YpTyS5yaNGelDuUVn/R5AAA0NgQZPxJVMbSUXViqMqer2vsqe2PCHTbNvq6/2sSGaF9WoW7+90oVlTp9UisAAP6AIONHooLLe2QMozzMVGd/VqEkqUVUsKJC7Hrruv6KCLJp1Z4sPTx3vU9qBQDAHxBk/IgtwKrI4FMPLx0NMuX76bRvGqZZV/eV1SJ9krxXi7cd8n6xAAD4AYKMn4lx7yVTsx6ZSkM6NNG1SW0kSY9+sV4lZdUPTQEA0FgQZPxMdOUxBSdZgr2vYo7MsUFGku4+7ww1CbNr58F8vf1riveKBADATxBk/ExNNsWr7JFp+YcgExkcqAcu6CJJemn+NqVlF3qpSgAA/ANBxs/UZFO8/dnHDy1VurR3S/VtHa2CEqee/HqTd4oEAMBPEGT8TOV5S1nV9Mi4XIbS3ENLxx+eabVa9Pdx3WS1SF+tS9NvO5j4CwBovAgyfuZoj8yJJ/seyi9WidMlq0WKizjxKeDdWkTqmkGtJUmPfb5BpSfZkwYAgIaMIONnTrW7b+VmeHERQQoMqP6vb9r5nRQbate2A3n6JHmv5wsFAMAPEGT8zKnmyJxo6fWJRAYH6vohbSRJv2w54LkCAQDwIwQZP3OqVUs1DTKSlNQ+VpK0PCVTLpfhoQoBAPAfBBk/Ex168h6ZfdUsvT6RHi2jFBRo1ZGCUm07kOe5IgEA8BMEGT8TUzG0lFtUdsJJukf3kDnxRN9j2W1W9WsdI0lalnLYg1UCAOAfCDJ+JiI4UBZL+Z+zCo5fubS/ml19qzOwbUWQ2ZnpmQIBAPAjBBk/E2C1uE/BPtE8mdrMkZGkge3K58ksSzksw2CeDACgcSHI+KHKeTIHcoqrXC8qdepwxdyZmgaZMxMi5bBZdSivRDsOMk8GANC4EGT8UOf4cEnS2r1ZVa5X9saEOWyKCLLV6LUctgD1ToySJC1leAkA0MgQZPxQ/zaVE3SrBo997mGlIFkqJ9LUwCD38BJBBgDQuBBk/NCAigm6ybsyVXbMyqXazo+pNLBtRZDZyTwZAEDjQpDxQ53jIxQeZFN+iVOb0nLd1/fVcsVSpd6JUbIHWHUgt1i7DhdUeSy/uEzvLdt90tO2AQDwVwQZPxRgtRwzvHR0/5f9tdgM71hBgQHqlRAlSVq68+jrGYahaR+t0UOfrdcz32+uZ9UAAPgeQcZPVQ4vLT9mXsv+Y+bI1NbAdpX7yRwNMp+v2a/vN2RIkn7efJBhJwBAg0OQ8VOVQWbFrqPnJLmDTGTtemSkqhN+DcNQRk6RHv18vfvx9JwibecYAwBAA0OQ8VPdW0QqODBARwpKtf1gnlwuQ/uz6zZHRpL6JEYrMMCitOwipWYW6oH/rVNOUZl6tIzU4IrDJRdtO1SvmvdlFaqwxFmv1wAAoDYIMn7KbrO6939ZlpKpw/klKilzyWKR4iNrP7QUbA9Qz1blr/fgZ+v085aDsgdY9c/LztQ5nZpJkhZtO1jnepN3H9Gwp3/W1W8tk5OTtgEAPkKQ8WPHzpOpHFaKCw9SYEDd/toqz136dXv5PJlp55+hM+LCNfSMJpLKN8wrLqtbj8rLP21TmctQ8u4jenfJrjq9BgAAtUWQ8WNHg8zhKpvh1VXluUuS1CcxSjcNbSdJ6hQXrqbhDhWWOpW8+0itX3fD/mz9suVob86zP2xVWnZhnesEAKCmCDJ+rHdC+byWjJxi97LpusyPqdSvdbQigmwKDgzQs385UwHW8t2BLRaLhnYo75WpyzyZWb/skCRd2LO5eiVEKa+4TI9/sbHOdQIAUFMEGT927LyWr9alSar9HjLHCnXY9OUdZ+n7qWerXdOwKo9VDi/Vdp7MrkP5+ub38tomD++g6Zf2UIDVou82pGvexow61woAQE0QZPxc5fBSZi1Pva5O69hQJcaGHHd9SEWPzPp9OTqcV3zc49V5beFOuQzpnE5N1bVFhLo0j9CNQ9tKkh77fL3yi8vqVS8AACdDkPFzlUGmUn2DTHWahQepS/MISdLi7TUbXsrIKdL/kvdKkm4b3sF9/a4RHdUqOlj7s4v0/Lytni8WAIAKBBk/17d1tKzHHHRdn8m+p3J2x9rNk3l7cYpKnC71ax1dJXCF2G36x8Xdy+/5NUXjZ/2mBz9dp7cXp2jxtkMn3Wsmp6hU17y1TJe/tkTr9mbV/ZsBAJwWbGYXgJOLCApUl+YR2rA/R1L95sicytCOTfXawp1atK38uAKLxVLtvdkFpfrP0t2SpNvPaX/c4+d0aqYrByTov8tTlbz7SJXVUK2ig/XejQPVOja0ynOKy5y65d/JWlIxsXnczF919cDWumdUJ0UGB3riWwQANDL0yDQAlb0dIfYAr76h92sTLYfNqoycYm07yXEFTpehV37epvwSpzrHh7s31Pujpy7poe+mDtWLV/TSlHM66PyucWoSZtfeI4W67LUlVY5EcLkM3fPxOi3ZeVih9gCN7hYvw5DeXbpbI/75iz5bvZezoAAAx6FHpgEY1C5Ws3/dpcSYkJP2ktRXUGCABraL1cKtB7Vw60GdERde5fEyp0tfrUvTyz9t046D+ZKk24a3r7Ymi8WizvER6hwf4b52ILdIV7+5TFsz8nT5a0v07qSB6toiQjO+26wv1+6XzWrRv67pq6Edm+q37Yf08OfrtfNgvu7+cK0WbT2k//tzzzpvCAgAaHwsRiP/b25OTo4iIyOVnZ2tiIiIUz/BD7lcht5cvFN9W0erb+uYUz+hHt5YuFNPfrNJw85oqrev66/DecVKyy7SxrQcvb5wp1IOlQeYyOBA3TqsvW4d1q7W4Sozv0TXvr1M6/flKCLIpkv7tNKc33ZJkp677Exd2qeV+97iMqfeWLhTz/+4TU6XoZFd4vTKVb0VFBjgse8ZAOB/avr+TZBBFZvTczT6hUWyWKQAi0Vlfzg3KTokUDcObadrk1orPKjuw1zZhaW6fvZyrdqT5b523+hOuv2Y1U/H+nFjhm5/f5VKylxKaherNyb2U5iDDkUAaKxq+v5NHz2q6BQXrvZNQ2UYUpnLkNUiNY8MUu/EKN0/urMW3X+uJp/ToV4hRirv0Xl30kAlVRybMDGptW4bdvyk4Uoju8bpnesHKMxh05Kdh3XVG0vde+vAc4rLnHp3yS732V51VVLm0usLd+jnLQc8VNnpYdWeI3rnt10qKGH/JaCm6JHBcbILSrXrcL7iIoLUJMwumxfnpDhdhlIO5at909AaDVGt25uliW8v15GCUnVoFqbZ1/VXQszxG/x50qG8Yq3clakWUcHq2CxcwfbGO6w1/dtNem3BTnWKC9dXd55Vp/lI+cVluvU/yVq07ZCsFumdGwZoaMemXqjWfxzJL1Gp06VmEXXbHiEtu1Azvt2sz9fslyS1iQ3RPy870+tDyfB/TpfhPk7mdMPQUgWCTOOz/UCurn5zudJzitQkzKG3JvbTmQlRHv86ew4X6I1FO/XRylQVl7kkSVZL+e7IneLCdUGPeI3r1dLjX9cs+7IKdc6zv6ik4nu9d1QnTT7n+KG+3KJSvTR/m9o0CdWf+7aSw3Y02GXml+j6OSu0NjXLfS0yOFCfTx6iNk1Cj3stXzvVtgJ1cTivWBe8uEj5xWWaO3mIOv5hkvzJFJU69eainZr58w4VljplsUhRwYE6UlAqq0W6+ez2uvu8jlXauDHYc7hAL87fph83ZeiWYe1027DqFw14Qm5RqZ6ft03xkQ5dPai1Quz+PyxdXObUQ5+t13fr0/XExd11ce/G87umpggyFQgyjVNadqGun71Cm9NzFRRo1YtX9NaobvEeee3N6Tl69ecd+mrdflVOEWrXNFTZBaU6/IfhrIfGdNFNZ7fzyNc127SP1ujTVfvUNNyhg7nFstus+u6uoVXO5SpzujTpnZVasLX8TK64CIduObu9rhyQqCMFJbrmrWXacTBfUSGBeu3qvpr+7WatSc1Sx2Zh+vT2wfUekqwtwzC0bm+2vt+Qrh82Zig9u0iPje2qv/RLqPFr/LQ5Q8GBNiW1jz3uMcMwdNt/Vum7DemSpC7NI/TZ7YNPORnd5TL01e9pevq7zdp7pHwYr1/raD02tpsSY0P0+Jcb9OmqfZKkzvHhev7yXu6dtxuyfVmFenn+Nn2SvLfK/LuLe7XQjPE9j2u3kjKXfttxSKlHCnUgp0jp2UXKyC2Wy2WoXdNQdWgWpg5Nw9QhLkzNwk/cG5aRU6TrZq/QprTyvbiahNl167D2unpQ6zotGli07aDKnIbO6XzibSc8ITO/RLe8u1IrdpXvv2W1SC9f2UcX9mzuta/pjwgyFQgyjVdecZkmv7dKC7YelMVSHiqGd2qmjWk52rg/RxvTclRU4tRZHZtoZJc4dWkefspN/p75YbPeW7ZHlf8qhp3RVLcNb6+BbWNksVh0MLdYW9Jz9eOmDPdKq7+P66Zrk9rUqObM/BLtzypUenaR0nOKdCCnSMF2m64akKjIEPM2/du4P0cXvrxIhiHNnTxEz83bqoVbD2pQuxj996ZBslgsMgxDj3y+Xv9ZukdBgVZFh9iVll0kqfzNIcBaflJ788ggvTtpgDo0C9eBnCKNfWWxMnKKNbJLM71+TT9ZT9FN7nIZ2nYgT8tTDmv5riNauStTUSF2PXFxd/VtHX3c/UWlTr3w4zYt2XFIQYEBCnXYFGIPkM1q0bKUTHeNx7qif4L+30XdTvlG9p+lu/Xw3PWyWKQXLu91XA/cZ6v36u4P1yowwKJQh01ZBaW6YUhbPTq2a7Wv+duOQ5r+zWb9vi9bkhQfEaQHx3TWRWe2qPLz+d36dD302e86nF+iMIdNb1/X/7gjS6qzdOdhzfh2sy7p3VLXDGp9yjb3trziMj37/Ra9t2y3Sp3l/7iGndFUfVtH68X55SsSeyVE6fVr+6pZeJByi0r1wfJUvbU4Rek5x//9nciANjG6d3Qn9W9ztI22H8jVxLdXaF9WoZqEORRiD9CezAJJ5SF8yrkdNWFAYo3ap8zp0pPfbNLsX3dJkv51dR+N7l6zYLHzYJ7eWLRTo7rFa3g1+25VSjmUr+tnL9euwwUKD7JpQJsYzd98QDarRa9O6KPz6/AftneX7tYz321WcZnL/e8jpOJQ4scv6qZQP104QZCpQJBp3MqcLj32xQa9t2zPKe9tGRWsEV2aaXD7JuqTGOWez2AYhj5bvU9PfbNJh/LKe1wu7NFct5/TXt1aRFb7es9+v0Wv/LxdkvT0+J66rP/x/8s/nFespTsztWTnIf2247B2Vuy/80eRwYG6c0RHXTOotey2qvNSCkrKlF1YquaR3tvV+dq3l2vh1oP6U8/meuWqPkrNLND5zy9UYanT/b29uWinnvh6kywW6V9X99XwTk31SfJezfplh7tXoX3TUP170sAqO1CvSc3SZa8tUUmZS7cMa6cRneO0JT1Hm9NztTUjV5n5JXK6DJU6DTldhvKKy5R3gsNGA6wWTTvvDN02rL37jWfd3izd/eEa975GJxJiD9A5nZrp/G5x2n24QM//uFWGIXVtHqFZV/c5bofpSvM3Zeimf69098rZrBa9ObGf+40oLbtQ5z+/ULlFZbrn/DPUtUWEbpizUpI05/r+x71hbUnP1YxvN+nnLeW9WaH2AN0yrL1uHNq22qGOQ3nFmvL+Ki3dmangwAC9ObGf+4DX6hSWODXyuQXaVzFhe2DbGD3z5zNPeFispxSUlMlhCzjhXI7fdhzSvR+vc9czuH2s/nr+Ge75P79tP6Tb3ltV8TMepDE9muujlanKLSr/GWgS5lDvxCjFRTgUHxGkuIp/t9sP5mnHgTxtO5Cn1MwC99/TOZ2a6p5RnVRQ4tSN76xUdmGp2jUJ1Ts3DFB8ZJD+l7xXL/+03V3PX887Q3eM6HjS7y8zv0RT3l+l33Ycdl8LD7Lp6zuGnrRdXS5Ds3/bpacrQoTdZtVHtySpVzVD4ctTMnXzuyuVVVCqllHBmnN9f7VrGqa/frRGc9fsV2CARa9f26/aTUj/qNTp0uNfbtB/llb/+/HS3i31z8vO9OrQXl0RZCoQZBo/wzD0xqKdevaHrQqwWNS5ebi6No9Q1xYRslosmr/pgBZvP6iiUleV57WMClavxCgdzC3W8pRMSVKHZmH6x7juJxxGONHXfeLrTXprcYr7f+wD28Zq+a5MLU85rBUpR7QlI7fKcyyW8l/Mlb+Q4yIcSt59RJvTy+9rHRui+0d3VlRwoH7bcVhLdh7W2tQsOQ1DL13RW2PPbOGhVjtq0baDuuat5QoMsGj+tOHuX8yVewpFBgfqgQs662+f/S7DkB6+sItuHHp0OK3U6dLc1fu0JT1Xt5/TQTGh9uO+xqer9mraR2trXFOIPUB9EqPVv02M+raO1ocrU/Xl2vKJsGd1aKJn/tJTH63Yq5d/2qYyl6Fm4Q7dO6qTgu0BKih2Kr+kTAUlTnWKC9dZHZtU6XlZvO2Q7vpgtQ7nlyg8yKa/j+umi85sWeVNeN3eLF3+2lIVljr1l76tVFzm0hdr9yso0Kr3bhyoPonRuvbt5Vq07ZB6JUTpk1uTZAuw6rHP1+udJbvVJMyub+86W03DHUrPLtJz87bok+S9chnlgWjCwETdMaKjmoQ5TtkWRaVO3fJushZsPSi7zarXru570mGN537Yopd+2q4mYXYVlDhVUOJUiD1AD1zQWVcPrH/vTF5xmRZtPahNaTnamJarTWk52pdVqJhQu0Z1i9MF3ZsrqX2sSp0uPf3dFnfPZavoYE2/tMcJJ36nHMrXpHdWVAn67ZqG6paz2+ni3i1POUcoPbtIL/20TR+uSJWzItEEBlhU6jTUJzFKb07sX+XnsnJ/qmd/2Cp7gFXf3DVUHZqFnfC1N+7P0c3vrtTeI4UKsQfo6T/31NuLU7RqT5Z6tIzUJ7clnbC+PYcLdM8na92/W6JDyuc+NQt36Ms7znIHskpfrN2vez5aqxKnS2cmROnNa/upaXj5z0eZ06U7P1itb35Pl8Nm1X2jOyvEHqBSp0slZS5ZLBb1SojSma0i3YszsgpKNPn9Vfp1+2FZLNI953fSRWe2qPiZKFPKoXzd+8k6OV2G/m98D13eP/Gkbbwvq1DzNqRrwdaDCnXY1Ld1tPokRqtriwivbVJKkKlAkDl9FJc5ZbNaT/i/wsISp37bcUjzNx/Qqt3lAePYn/ygQKvuOLejbhra7rgekZMxDEMPz11/0h6hTnHhSmofq8HtYzWwbexxQ0hOl6FPklP17A9bdTC3uNrXCbUH6Ms7zqoyZ6W+XC5Df3p5sTam5Rw3JFLmdOniV3/V+n057msTBibqiYu71+l/b5U9WC0ig9QpPlydm0eoc3y4moUHKTDAIluAVTarRQ6bVW2ahFb55WgYhj5euVePfbFBhaVOWS1y/w/8Tz2b64mLuysq5PgAVZ307CJNeX+VVlacAda+aaimnNtBY3u2UFp2kS559VcdyivR0I5N9PZ1/WUY0s3vrtQvWw4qIsimy/ol6M3FKXLYyt8E21f8nRSVOjXulV+1JSNXw85oqu4tI/TW4hR3iL6ge7zuG91ZbWs58bm4zKkp76/WvI0ZCgyw6OUr+2h09+OHGHYfztd5zy9USZlL/7q6r7o2j9C9n6zVsoo30wFtY/TY2K7V9jTmFpWqpMyl2GoC1vp92br1P8nuHrjqRIUEKtRuc/d6XDUwUX8b0+Wkez9lF5bqoc9+15GCEl03uK1GdG5W69CVcihfz83b6g6953WN00tX9D7hSkPDMHTd7BVasPWgBrSJ0Qc3Dzru6/20OUOT31utwlKnWseG6PVr+qlTfLj2ZxXqwpcW6UhBqa5Naq2/j+vufk5BSZneW7pHz/+41R0i/zami8b1aqHxs37T1ow8ndkqUh/ekqSgwAAZhqHXFu7UjG83S5JGdYvTC5cfX3Op06Xb/rNKP27KqPb7jwiyaUiHJhrYNkZzftulXYcLFGIP0AuX9zrhkNSrv2zX099tkcNm1dzJQ46bh7Uvq1CfJu/V9xvTq/weOFZQoFU9W0bphrPa1HioraYIMhUIMjiRvOIyrUvN0urULOUWlWnCwMQ6L+N2uQzd9791+iR5r6wWqWuLCPVvE6OBbWPUr01Mjf7XLZUvW3594U69/WuKQuwBSmoXq8Htm2hA2xg98Ok6Ld2ZWe1kUpfL0Ib9Odp5KE+7Dxdo1+F87TlcIEegVT1blf9P7cyEKMVHBMliscjpMpRbVKqv1qXp4bnrFR5k08J7z1H0H3pT1u/L1kWvLJbLkM4+o6nentivXsvxy5yuej1/+4FcTXl/tTan5yoiqPyU9bquHCt1uvTagh16Y1GKsgtLJZUvezYk7T5coC7NI/TRLYPcE5QLS5y6+q1lVQ5AffRPXXXDWW2rvO6W9FyNfWWxe/WXVD6R928XdlGfxOPn+NSm3qkfrtHX69IUYLXoyYu764oBVf8XfeM7K/TjpgMa2rGJ/n3DAFksFrlcht5dulszvt3sXhn1l76tdM/5ndzDq5vScvTOb7s0d80+FZe5dEmvlvrrqE5Vhgg/Wpmqh+euV0mZS80jgzS0YxN1aR6hLs0j1LFZmDal5eqb9Wn6fn26e1J888gg/d/4njr7DN8uv9+UlqNtB/J0YY/mJ126vPdI+RBqQYlTT13SQ1cNPNqeP28+oFveTVaJ06WhHZvo5St7VwnLP285oOtnr5AkvXJVbw3t0FTvLNml2b+m6EhB+c/TH4f1dh/O17iZvyqroFSX9G6pZ/7cU//vmKGfG4a01UMXdqm25uIyp56bt1Vb0nMVGGCVPcAqW4BFBSVOLU/JdP8cV2oZFaw3J/ardqK4y2Vo0jsr9POWg2rbJFRfTBmi8KBA5RWXadYv2/XGohT3z7HFIvVvHaORXZuppMyl5N1HtDo1S1kV3+szf+5Zq0n0NUGQqUCQgS8YhqGNaTlKjAmp98qcyn+Sx/Z6HMgp0piXFulQXomuHJCo6Zf2cD+242Ce7v14bZVdkqsTHRKoMqeh3D/MQbl/dGfdNvzEGxJ+vmafkncf0T2jOinCx6uOTqSo1KkfN2VoQJuYOu/bcqzcolL9e8luvbU4xb3JYvPIIH12+xDFR1Z9/eyCUl322hJtychVUrtYvXfjwBP2GlROEm7XJFT3X9BZ53eN88gcBKfL0H2frNP/Vu2VVP7G97cxnWULsOqnzRm6Yc5K2awWfTf17OOGSlIzC/T091vcvRUh9gBdPai11u3N0tKdmcd9LbvNqklntdWks9rqnz9s1X+Xl7/ZjujcTM9d3qvaA2zLnC4t35WpfUcKdX63eL8/uf6txSn6x1cbFR5k0/xpw9QsIkgLth7UTf9eqZIyly7s0VwvXtHrhAH8/77brFm/7FCIPUAWSfklTklSYkyIbh/eXpf1Szju5+O37Yd0zdvL5XQZat80VDsO5stikR6+sKsm/SEU14bTZWjd3iwt2nZIi7cdUkSwTTPG9zzlf6SO5JfowpcWaX92kf7Us7nO7thUz/ywxd073L9NtP7ct5VGdIk77rUMw9DOQ/lK3n1EZ3dsety/l/oiyFQgyKCxWLTtoK59e7kMQ3rxil76U88Wmv1rip75fouKy1wKDgxQj5aRah0bojZNQtU6NkR5RWVauzdba1OztCUj1z1/oFL5yoVIzbl+wGl/flXlkMCKXZm6d1SnaveDycwv0fcb0jWmR/OTvkmnZxd5ZUNJwzD00vztev7HrZLKV//887IzNX7Wb9p9uEC3nN1OD47pUu3zk3cf0RNfb9TqY4JvgNWi0d3iNXFwGzlsVj31zSb3cFTlMJ7FIk0beYYmn9PB9FVQnuR0Gbr01V+1dm+2LugerwkDW2vSOytUXObSqG5xeuWqPtXOASlzunTVG8u0fFd5W3WOD9dtw9vrwh7NT/r3/s5vu/TYFxskSQ6bVS9c3ksX9DBvaXXy7iO6/LUlVZbEt4kN0YNjungshNcFQaYCQQaNSeVEzlB7gDo3j3APcwzt2EQzxvesMhTwR4UlTqUcyldQoFWRwYEKDwqs1Xwg+Jev16Xprx+vUVGpS2EOm/KKy9Qs3KGf7hl+ynPIDMPQV+vS9PmafTojLlxXD2qtFsf87BiGoR83HdCMbze59wV68YreGubjISJf2bg/R2NfWSyny3BPEh7ZJU6vTuhzyn8jh/KK9dbiFPVrHa1zOzer0Zu+YRj65w9b9eOmDD15SXe/2MG5clVieJBNd43oqGuT2pj++4EgU4Egg8bE6TI04c2l7qGAUHuAHrqwq64ckOCXyyfhXb/vzdZN/17p3mvlxSuO3+umPsqcLi3cdlDdWkQet8qmsakcJpKkczs306yr+zS6HZVP5fe92UqICa7VxHlvIshUIMigsTmQU6RJ76xU03CHHr+om9fPmoJ/O5BTpMe+2OD+eSDQ1k1RqVN//WitIoJtemzsqTdLhPcRZCoQZAAAaHhq+v7NADkAAGiwCDIAAKDBIsgAAIAGiyADAAAaLIIMAABosAgyAACgwSLIAACABosgAwAAGiyCDAAAaLAIMgAAoMEiyAAAgAaLIAMAABosggwAAGiwCDIAAKDBspldgLcZhiGp/DhwAADQMFS+b1e+j1en0QeZ3NxcSVJCQoLJlQAAgNrKzc1VZGRktY9bjFNFnQbO5XJp//79Cg8Pl8Vi8djr5uTkKCEhQampqYqIiPDY6+LEaG/foa19h7b2HdradzzV1oZhKDc3Vy1atJDVWv1MmEbfI2O1WtWqVSuvvX5ERAT/KHyI9vYd2tp3aGvfoa19xxNtfbKemEpM9gUAAA0WQQYAADRYBJk6cjgceuyxx+RwOMwu5bRAe/sObe07tLXv0Na+4+u2bvSTfQEAQONFjwwAAGiwCDIAAKDBIsgAAIAGiyADAAAaLIJMHc2cOVNt2rRRUFCQBg4cqOXLl5tdUoM3ffp09e/fX+Hh4WrWrJkuvvhibdmypco9RUVFmjx5smJjYxUWFqbx48crIyPDpIobjxkzZshisWjq1Knua7S15+zbt09XX321YmNjFRwcrB49emjlypXuxw3D0KOPPqrmzZsrODhYI0eO1LZt20ysuGFyOp165JFH1LZtWwUHB6t9+/b6xz/+UeWsHtq6bhYuXKixY8eqRYsWslgsmjt3bpXHa9KumZmZmjBhgiIiIhQVFaVJkyYpLy+v/sUZqLUPPvjAsNvtxttvv21s2LDBuOmmm4yoqCgjIyPD7NIatFGjRhmzZ8821q9fb6xZs8YYM2aMkZiYaOTl5bnvufXWW42EhARj/vz5xsqVK41BgwYZgwcPNrHqhm/58uVGmzZtjJ49exp33XWX+zpt7RmZmZlG69atjeuuu85YtmyZsXPnTuP77783tm/f7r5nxowZRmRkpDF37lxj7dq1xkUXXWS0bdvWKCwsNLHyhufJJ580YmNjja+++spISUkxPv74YyMsLMx48cUX3ffQ1nXzzTffGA899JDx6aefGpKMzz77rMrjNWnX0aNHG2eeeaaxdOlSY9GiRUaHDh2MK6+8st61EWTqYMCAAcbkyZPdnzudTqNFixbG9OnTTayq8Tlw4IAhyViwYIFhGIaRlZVlBAYGGh9//LH7nk2bNhmSjCVLlphVZoOWm5trdOzY0Zg3b54xbNgwd5ChrT3n/vvvN84666xqH3e5XEZ8fLzxzDPPuK9lZWUZDofD+O9//+uLEhuNCy+80LjhhhuqXLv00kuNCRMmGIZBW3vKH4NMTdp148aNhiRjxYoV7nu+/fZbw2KxGPv27atXPQwt1VJJSYmSk5M1cuRI9zWr1aqRI0dqyZIlJlbW+GRnZ0uSYmJiJEnJyckqLS2t0vadO3dWYmIibV9HkydP1oUXXlilTSXa2pO++OIL9evXT3/5y1/UrFkz9e7dW2+88Yb78ZSUFKWnp1dp68jISA0cOJC2rqXBgwdr/vz52rp1qyRp7dq1Wrx4sS644AJJtLW31KRdlyxZoqioKPXr1899z8iRI2W1WrVs2bJ6ff1Gf2ikpx06dEhOp1NxcXFVrsfFxWnz5s0mVdX4uFwuTZ06VUOGDFH37t0lSenp6bLb7YqKiqpyb1xcnNLT002osmH74IMPtGrVKq1YseK4x2hrz9m5c6dmzZqladOm6W9/+5tWrFihO++8U3a7XRMnTnS354l+p9DWtfPAAw8oJydHnTt3VkBAgJxOp5588klNmDBBkmhrL6lJu6anp6tZs2ZVHrfZbIqJial32xNk4JcmT56s9evXa/HixWaX0iilpqbqrrvu0rx58xQUFGR2OY2ay+VSv3799NRTT0mSevfurfXr1+tf//qXJk6caHJ1jctHH32k9957T++//766deumNWvWaOrUqWrRogVt3YgxtFRLTZo0UUBAwHGrNzIyMhQfH29SVY3LlClT9NVXX+nnn39Wq1at3Nfj4+NVUlKirKysKvfT9rWXnJysAwcOqE+fPrLZbLLZbFqwYIFeeukl2Ww2xcXF0dYe0rx5c3Xt2rXKtS5dumjPnj2S5G5PfqfU37333qsHHnhAV1xxhXr06KFrrrlGd999t6ZPny6JtvaWmrRrfHy8Dhw4UOXxsrIyZWZm1rvtCTK1ZLfb1bdvX82fP999zeVyaf78+UpKSjKxsobPMAxNmTJFn332mX766Se1bdu2yuN9+/ZVYGBglbbfsmWL9uzZQ9vX0ogRI/T7779rzZo17o9+/fppwoQJ7j/T1p4xZMiQ47YR2Lp1q1q3bi1Jatu2reLj46u0dU5OjpYtW0Zb11JBQYGs1qpvawEBAXK5XJJoa2+pSbsmJSUpKytLycnJ7nt++uknuVwuDRw4sH4F1Guq8Gnqgw8+MBwOhzFnzhxj48aNxs0332xERUUZ6enpZpfWoN12221GZGSk8csvvxhpaWnuj4KCAvc9t956q5GYmGj89NNPxsqVK42kpCQjKSnJxKobj2NXLRkGbe0py5cvN2w2m/Hkk08a27ZtM9577z0jJCTE+M9//uO+Z8aMGUZUVJTx+eefG+vWrTPGjRvHkuA6mDhxotGyZUv38utPP/3UaNKkiXHfffe576Gt6yY3N9dYvXq1sXr1akOS8dxzzxmrV682du/ebRhGzdp19OjRRu/evY1ly5YZixcvNjp27MjyazO9/PLLRmJiomG3240BAwYYS5cuNbukBk/SCT9mz57tvqewsNC4/fbbjejoaCMkJMS45JJLjLS0NPOKbkT+GGRoa8/58ssvje7duxsOh8Po3Lmz8frrr1d53OVyGY888ogRFxdnOBwOY8SIEcaWLVtMqrbhysnJMe666y4jMTHRCAoKMtq1a2c89NBDRnFxsfse2rpufv755xP+fp44caJhGDVr18OHDxtXXnmlERYWZkRERBjXX3+9kZubW+/aLIZxzJaHAAAADQhzZAAAQINFkAEAAA0WQQYAADRYBBkAANBgEWQAAECDRZABAAANFkEGAAA0WAQZAADQYBFkAHhMmzZt9MILL9T4/l9++UUWi+W4wykbq9q2D4BTs5ldAADzDB8+XL169fLYm+uKFSsUGhpa4/sHDx6stLQ0RUZGeuTrAzj9EGQAnJRhGHI6nbLZTv3romnTprV6bbvdrvj4+LqWBgAMLQGnq+uuu04LFizQiy++KIvFIovFol27drmHe7799lv17dtXDodDixcv1o4dOzRu3DjFxcUpLCxM/fv3148//ljlNf84dGKxWPTmm2/qkksuUUhIiDp27KgvvvjC/fgfh5bmzJmjqKgoff/99+rSpYvCwsI0evRopaWluZ9TVlamO++8U1FRUYqNjdX999+viRMn6uKLLz7p97t48WINHTpUwcHBSkhI0J133qn8/Pwqtf/jH//QlVdeqdDQULVs2VIzZ86s8hp79uzRuHHjFBYWpoiICF122WXKyMiocs+XX36p/v37KygoSE2aNNEll1xS5fGCggLdcMMNCg8PV2Jiol5//fWT1g3g5AgywGnqxRdfVFJSkm666SalpaUpLS1NCQkJ7scfeOABzZgxQ5s2bVLPnj2Vl5enMWPGaP78+Vq9erVGjx6tsWPHas+ePSf9Oo8//rguu+wyrVu3TmPGjNGECROUmZlZ7f0FBQV69tln9e6772rhwoXas2eP7rnnHvfj//d//6f33ntPs2fP1q+//qqcnBzNnTv3pDXs2LFDo0eP1vjx47Vu3Tp9+OGHWrx4saZMmVLlvmeeeUZnnnmmVq9erQceeEB33XWX5s2bJ0lyuVwaN26cMjMztWDBAs2bN087d+7U5Zdf7n7+119/rUsuuURjxozR6tWrNX/+fA0YMKDK1/jnP/+pfv36afXq1br99tt12223acuWLSetH8BJ1Pv8bAAN1rBhw4y77rqryrWff/7ZkGTMnTv3lM/v1q2b8fLLL7s/b926tfH888+7P5dkPPzww+7P8/LyDEnGt99+W+VrHTlyxDAMw5g9e7Yhydi+fbv7OTNnzjTi4uLcn8fFxRnPPPOM+/OysjIjMTHRGDduXLV1Tpo0ybj55purXFu0aJFhtVqNwsJCd+2jR4+ucs/ll19uXHDBBYZhGMYPP/xgBAQEGHv27HE/vmHDBkOSsXz5csMwDCMpKcmYMGFCtXW0bt3auPrqq92fu1wuo1mzZsasWbOqfQ6Ak6NHBsAJ9evXr8rneXl5uueee9SlSxdFRUUpLCxMmzZtOmWPTM+ePd1/Dg0NVUREhA4cOFDt/SEhIWrfvr378+bNm7vvz87OVkZGRpVejoCAAPXt2/ekNaxdu1Zz5sxRWFiY+2PUqFFyuVxKSUlx35eUlFTleUlJSdq0aZMkadOmTUpISKjSa9W1a1dFRUW571mzZo1GjBhx0lqObQ+LxaL4+PiTtgeAk2OyL4AT+uPqo3vuuUfz5s3Ts88+qw4dOig4OFh//vOfVVJSctLXCQwMrPK5xWKRy+Wq1f2GYdSy+qry8vJ0yy236M477zzuscTExHq99rGCg4NPeU9t2wPAydEjA5zG7Ha7nE5nje799ddfdd111+mSSy5Rjx49FB8fr127dnm3wD+IjIxUXFycVqxY4b7mdDq1atWqkz6vT58+2rhxozp06HDch91ud9+3dOnSKs9bunSpunTpIknq0qWLUlNTlZqa6n5848aNysrKUteuXSWV97bMnz+/3t8ngJqjRwY4jbVp00bLli3Trl27FBYWppiYmGrv7dixoz799FONHTtWFotFjzzyiCk9CXfccYemT5+uDh06qHPnznr55Zd15MgRWSyWap9z//33a9CgQZoyZYpuvPFGhYaGauPGjZo3b55eeeUV932//vqrnn76aV188cWaN2+ePv74Y3399deSpJEjR6pHjx6aMGGCXnjhBZWVlen222/XsGHD3MNwjz32mEaMGKH27dvriiuuUFlZmb755hvdf//93m0U4DRGjwxwGrvnnnsUEBCgrl27qmnTpied7/Lcc88pOjpagwcP1tixYzVq1Cj16dPHh9WWu//++3XllVfq2muvVVJSknu+S1BQULXP6dmzpxYsWKCtW7dq6NCh6t27tx599FG1aNGiyn1//etftXLlSvXu3VtPPPGEnnvuOY0aNUpS+RDQ559/rujoaJ199tkaOXKk2rVrpw8//ND9/OHDh+vjjz/WF198oV69euncc8/V8uXLvdMQACRJFqO+g88AYCKXy6UuXbrosssu0z/+8Y86v06bNm00depUTZ061XPFAfA6hpYANCi7d+/WDz/8oGHDhqm4uFivvPKKUlJSdNVVV5ldGgATMLQEoEGxWq2aM2eO+vfvryFDhuj333/Xjz/+6J6UC+D0wtASAABosOiRAQAADRZBBgAANFgEGQAA0GARZAAAQINFkAEAAA0WQQYAADRYBBkAANBgEWQAAECD9f8Bo5EXoJMILKsAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 为对比学习负采样准备词频率分布\n",
    "vocab_size = len(dataset.token2id)\n",
    "embed_size = 128\n",
    "distribution = dataset.get_word_distribution()\n",
    "print(distribution)\n",
    "model = SkipGramNCE(vocab_size, embed_size, distribution)\n",
    "\n",
    "from torch.utils.data import DataLoader\n",
    "from torch.optim import SGD, Adam\n",
    "\n",
    "# 定义静态方法collate_batch批量处理数据，转化为PyTorch可以需要的张量类型\n",
    "class DataCollator:\n",
    "    @classmethod\n",
    "    def collate_batch(cls, batch):\n",
    "        batch = np.array(batch)\n",
    "        input_ids = torch.tensor(batch[:, 0], dtype=torch.long)\n",
    "        labels = torch.tensor(batch[:, 1], dtype=torch.long)\n",
    "        return {'input_ids': input_ids, 'labels': labels}\n",
    "\n",
    "# 定义训练参数以及训练循环\n",
    "epochs = 100\n",
    "batch_size = 128\n",
    "learning_rate = 1e-3\n",
    "epoch_loss = []\n",
    "\n",
    "data_collator = DataCollator()\n",
    "dataloader = DataLoader(data, batch_size=batch_size, shuffle=True,\\\n",
    "    collate_fn=data_collator.collate_batch)\n",
    "optimizer = Adam(model.parameters(), lr=learning_rate)\n",
    "model.zero_grad()\n",
    "model.train()\n",
    "\n",
    "# 需要提前安装tqdm\n",
    "from tqdm import trange\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# 训练过程，每步读取数据，送入模型计算损失，并使用PyTorch进行优化\n",
    "with trange(epochs, desc='epoch', ncols=60) as pbar:\n",
    "    for epoch in pbar:\n",
    "        for step, batch in enumerate(dataloader):\n",
    "            loss = model(**batch)\n",
    "            pbar.set_description(f'epoch-{epoch}, loss={loss.item():.4f}')\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "            model.zero_grad()\n",
    "        epoch_loss.append(loss.item())\n",
    "    \n",
    "epoch_loss = np.array(epoch_loss)\n",
    "plt.plot(range(len(epoch_loss)), epoch_loss)\n",
    "plt.xlabel('training epoch')\n",
    "plt.ylabel('loss')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c9430e9a",
   "metadata": {},
   "source": [
    "TF-IDF加权\n",
    "\n",
    "定义词频率（term frequency）。注意到不同长度的文章词频率会有较大差距，不利于比较和运算，因此可以对词频率取对数。\n",
    "\n",
    "$$\\text{tf}_{t,d} = \\log (\\text{count}(t,d) + 1)$$\n",
    "\n",
    "其中$\\text{count}(t,d)$表示词$t$在文档$d$中出现的次数，为了避免对0取对数，把所有的计数加1。\n",
    "\n",
    "那么如何区分高频词与低频词呢？TF-IDF引入了另一个重要的评价指标——文档频率（document frequency），即一个词在语料库所包含的多少篇文档中出现。在所有文档里出现的词往往是虚词或是常见实词，而只在少量文档里出现的词往往是具有明确含义的实词并且具有很强的文档区分度。用$\\text{df}_t$来表示在多少篇文档中出现了词$t$。\n",
    "\n",
    "为了压低高频词和提升低频词的影响，TF-IDF使用文档频率的倒数，也就是逆向文档频率（inverse document frequency）来对词频率进行加权。这很好理解，一个词的文档频率越高，其倒数就越小，权重就越小。\n",
    "\n",
    "$$\\text{idf}_t = \\log \\frac{N}{\\text{df}_t}$$\n",
    "\n",
    "其中$N$表示文档总数。为了避免分母为0，通常会将分母改为$\\text{df}_t+1$。\n",
    "\n",
    "基于词频率和逆向文档频率，得到TF-IDF的最终值为：\n",
    "\n",
    "$$w_{t,d} = \\text{tf}_{t,d} \\times \\text{idf}_{t}$$\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f765e353",
   "metadata": {},
   "source": [
    "很多情况下会额外对文档的TF-IDF向量使用L2归一化，使得不同文档的TF-IDF向量具有相同的模长，便于相互比较。\n",
    "下面给出了TF-IDF的代码实现。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "9ce8e610",
   "metadata": {},
   "outputs": [],
   "source": [
    "class TFIDF:\n",
    "    def __init__(self, vocab_size, norm='l2', smooth_idf=True,\\\n",
    "                 sublinear_tf=True):\n",
    "        self.vocab_size = vocab_size\n",
    "        self.norm = norm\n",
    "        self.smooth_idf = smooth_idf\n",
    "        self.sublinear_tf = sublinear_tf\n",
    "    \n",
    "    def fit(self, X):\n",
    "        doc_freq = np.zeros(self.vocab_size, dtype=np.float64)\n",
    "        for data in X:\n",
    "            for token_id in set(data):\n",
    "                doc_freq[token_id] += 1\n",
    "        doc_freq += int(self.smooth_idf)\n",
    "        n_samples = len(X) + int(self.smooth_idf)\n",
    "        self.idf = np.log(n_samples / doc_freq) + 1\n",
    "    \n",
    "    def transform(self, X):\n",
    "        assert hasattr(self, 'idf')\n",
    "        term_freq = np.zeros((len(X), self.vocab_size), dtype=np.float64)\n",
    "        for i, data in enumerate(X):\n",
    "            for token in data:\n",
    "                term_freq[i, token] += 1\n",
    "        if self.sublinear_tf:\n",
    "            term_freq = np.log(term_freq + 1)\n",
    "        Y = term_freq * self.idf\n",
    "        if self.norm:\n",
    "            row_norm = (Y**2).sum(axis=1)\n",
    "            row_norm[row_norm == 0] = 1\n",
    "            Y /= np.sqrt(row_norm)[:, None]\n",
    "        return Y\n",
    "    \n",
    "    def fit_transform(self, X):\n",
    "        self.fit(X)\n",
    "        return self.transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9379c8b3-33b8-46af-a935-4f09eb35eb4d",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "sun",
   "language": "python",
   "name": "sun"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
